# Code for the Monte-Carlo analysis

# December 9th, 2020

# So, we need 4 daily (within-trip) behavioral models to explain the point.
# 1. Homogeneous, no freshness deterioration model. This model is to explain what "trip level frontier" means.
# There is no "true model" for the trip level frontier, as we specify that it is generated from the daily level behavior. Hence, the trip level frontier is something aggregated production of daily level catch IF the exogenous effort assumption is true, (which is not).  We run this model and estimate with the trip level data to get the "true" parameter of trip level production. 

# 2. Homogeneous, freshness deterioration
# This is the true model we estimated in the main section. Hence, we assume that this is the true DGP, and see what is going to happen if we run trip-level frontier analysis to the data generated by this model.
# There may be two points: the estimated coefficient on the effort (a day at sea) is "biased", relative to the "true" effort coefficient in model 1. Also, evaluation using the estimated frontier may be correlated with the freshness-loss. Why?
  
# 3. Heterogenous, no freshness deterioration
# Difference fishing technology/skill, but trip duration is endogenously determined. 

# 4. Heterogeneous, freshness deterioration 
# This is based on the reviewer's suggestion. Even though there exist freshness loss aversion and associated bias, there still may be inefficiencies. Heterogeneity is in the ability to catch, so we model it by differing the expected daily amount of catches by vessels, but daily stochasticity remains. 


# library
library(tidyverse)
library(frontier)
library(patchwork)
library(knitr)
library(kableExtra)

# parameters
  n_ves = 20   # number of vessels
  n_trip = 100  # number of trips per vessels

  price = 2
  fuel_p = 0

  theta21 = 0.005 # freshness parameter

  # parameter for catch transition
  prob0_1 = 0.6  # catch is 1 if the previous catch is 0
  prob1_1 = 0.7  # catch is 1 if the previous catch is 1

  # parameters for "bad" catch transition
  prob0_2 = 0.6  # catch is 1 if the previous catch is 0
  prob1_2 = 0.6  # catch is 1 if the previous catch is 1

# --- Catch process ----
# Define function to express the catch
  # Input: expected catch, which is the catch on previous day
  #        To express the lag dependent process, the catch probability depends on the past catch
  # Output: Realized catch, 0 or 1.
# if the previous day catch is 0, today's catch is 1 with prob = 0.3, and catch is 0 with prob = .7
# if the previous day catch is 1, today's catch is 1 with prob = 0.7, and catch is 0 with prob = .3
catch_fun = function(c_exp){
  ##if(c_exp == 0){
   a = sample(0:1, length(c_exp), replace = TRUE, prob = c(1-prob0_1,prob0_1))##}
  ##else if(c_exp == 1){
   b = sample(0:1, length(c_exp), replace = TRUE, prob = c(1-prob1_1,prob1_1))##}
 ##else{
  ##  NA
##  }

 ifelse(c_exp == 0, a, ifelse(c_exp == 1, b, NA))
}

# "bad" catch transition
  # This function is used for heterogenous scenario
  # Some vessels (vessels with even number) are "bad" at catching,
  # which is expressed as lower probability of catch
  # We also use the probability for beliefs (i.e. vessels know correct catch transition)
catch_fun_bad = function(c_exp){
  a = sample(0:1, length(c_exp), replace = TRUE, prob = c(1-prob0_2,prob0_2))
  b = sample(0:1, length(c_exp), replace = TRUE, prob = c(1-prob1_2,prob1_2))
  ifelse(c_exp == 0, a, ifelse(c_exp == 1, b, NA))
}

# ======== Generate data: Scenario 1 ========
 # The trip days are endogenously determined. 

 # Exogenous days: The Scenario 1, the days of catch is determined 
 exo_days = expand.grid(ves = 1:n_ves, trip = 1:n_trip) %>% 
            mutate(trip_day = sample(20:50, n(), replace = TRUE))

 # Generate data
 data = expand.grid(ves = 1:n_ves, trip = 1:n_trip, day = 1:50) %>%
   arrange(ves,trip,day) %>%
   left_join(exo_days, by = c("ves","trip")) %>%
   # delete the rows that days is greater than the exogenous trip days
   ## filter(day <= trip_day) %>%
   mutate(cont_1= ifelse(day <= trip_day, 1, 0),
          catch_exp = ifelse(day == 1, 1, NA),  # expect 1 on the first day
          catch2_exp = ifelse(day == 1, 1, NA),  # expect 1 on the first day
          catch = NA,
          catch2 = NA)

# funciton of taking lag
##lag_func = function(var) {ifelse(dplyr::lag(var,1) == 0, 1, dplyr::lag(var,1))}

# GDP: from day 1 to day 50
for(i in 1:50){
  data = data %>%
    group_by(ves,trip) %>%
    mutate(catch = ifelse(day == i, catch_fun(catch_exp), catch),
           catch_exp = ifelse(day == i+1, lag(catch,1),catch_exp),
           # heterogeneous case
           catch2 = ifelse(day == i & (ves %% 2) == 0, catch_fun_bad(catch2_exp), 
                           ifelse(day == i & (ves %% 2) == 1, catch_fun(catch2_exp), 
                                  catch2)),
           catch2_exp = ifelse(day == i+1, lag(catch2,1),catch2_exp),
           ) %>%
    ungroup()
}

## summary(data)
data = data %>%
  group_by(ves,trip) %>%
  mutate(cum_catch = cumsum(catch)-catch,
         catch_day = catch*day,
         cum_catch_day = cumsum(catch_day)-catch_day) %>%
  ungroup()
## ggplot(data, aes(x = catch_exp,y = catch)) + geom_jitter(size = 0.4)

# aggregate the data at trip level
  
data_trip = data %>%
  filter(cont_1 == 1) %>%
  group_by(ves,trip) %>%
  summarise(trip_day = max(trip_day),
            catch = sum(catch)) %>%
  ungroup()

# DEA
x <- as.matrix(data_trip$trip_day)
y <- as.matrix(data_trip$catch)

Benchmarking::dea.plot.frontier(x,y,RTS="drs")

summary(Benchmarking::sfa(x,y))

x1 <- as.matrix(data_trip[data_trip$catch <= 41,]$trip_day)
y1 <- as.matrix(data_trip[data_trip$catch <= 41,]$catch)

summary(Benchmarking::sfa(x1,y1))

# ================================================================
# 2. Homogeneous, with freshness
# ================================================================

# The parameters are arbitrarily chosen to ensure that the state transitions probabilities are plausible:
# Data Generation Process

beta = 1    # discount factor

# B. define utility

# Define myopic utility 
# input：state S, parameter, transition probabilityパラメータ、状態推移確率
# output: utility corresponding to S 


myopic_util=function(X, params){
  # M: number of states
  
  price = params[1]
  fuel_p = params[2]
  theta21 = params[3]
  
  
  # M x 1 vector：continuation utility and return utility
  cont_util = -theta21*(X[,2]-20)*X[,1] # -theta21*(day-20)*H20
  return_util = price*(X[,1]+X[,3])   # price*(H20+cum_catch)
  
  # M x 2
  ##cbind(maint_cost,repl_cost)
  cbind(cont_util,return_util)
}


# B.2. Transition probabilities matrix

mat_spe0 = expand.grid(h20 = 0:20, day= 21:50, ad = 0:30, prev = 0:1) %>%
  arrange(h20,day,ad,prev)

mat_spe = expand.grid(day= 21:50,ad = 0:30, prev = 0:1) %>%
  arrange(day,ad,prev)

mat_temp = matrix(nrow = nrow(mat_spe), ncol = nrow(mat_spe))
mat_temp2 = matrix(nrow = nrow(mat_spe), ncol = nrow(mat_spe)) # for "bad" production process

for(k in 1:nrow(mat_spe)){
  for(kk in 1:nrow(mat_spe)){
    if(mat_spe[kk, "day"] == mat_spe[k,"day"]+1){ # for sure next day is + 1
      if(mat_spe[k, "prev"] == 0){ # additional catch prob depends on the prev catch
        # the case catch is 1 (additional catch is +1)
        if(mat_spe[kk, "ad"] == mat_spe[k, "ad"]+1 & mat_spe[kk, "prev"] == 1){
          mat_temp[k,kk] = prob0_1
          mat_temp2[k,kk] = prob0_2
        # the case catch is zero (additional catch is same as before)
        }else if(mat_spe[kk, "ad"] == mat_spe[k, "ad"] & mat_spe[kk, "prev"] == 0){
          mat_temp[k,kk] = 1 - prob0_1
          mat_temp2[k,kk] = 1 - prob0_2
          }
      }else if(mat_spe[k, "prev"] == 1){
        if(mat_spe[kk, "ad"] == mat_spe[k, "ad"]+1 & mat_spe[kk, "prev"] == 1){
          mat_temp[k,kk] = prob1_1
          mat_temp2[k,kk] = prob1_2
        }else if(mat_spe[kk, "ad"] == mat_spe[k, "ad"] & mat_spe[kk, "prev"] == 0){
          mat_temp[k,kk] = 1 - prob1_1
          mat_temp2[k,kk] = 1 - prob1_2
        }
      }
    }
  }
}


# make large block diagonal matrix
# make list of transition matricies, then make them into a large block diag matrix
# because H20 is unchanged within a trip, so the transition is only within the same H20.
# Each block is corresponding to each value of H20.

 lits_diag = list()
 lits2_diag = list()
 for(h20 in 0:20){
   lits_diag[[h20+1]] = mat_temp
   lits2_diag[[h20+1]] = mat_temp2
   h20 = h20 + 1
 }
 
 mat_trans = Matrix::bdiag(lits_diag)
 mat_trans2 = Matrix::bdiag(lits2_diag)
 mat_trans[is.na(mat_trans)] <- 0       # replace NA with 0
 mat_trans2[is.na(mat_trans2)] <- 0     # replace NA with 0

# C. define choice probabilities

# choice_prob function takes array corresponding to states
# util_array is matrix of 2 columns and S rows
 
choice_prob=function(util_array){
  
  # Returns the probability of each choice, conditional on an array of state/decision costs.
  
  M = nrow(util_array) # number of state
  v = util_array-apply(util_array,1,min) #normalization: take the difference since 1) results are the same 2) more stable with exp()
  ev = exp(v)
  pchoice = ev/rowSums(ev)　# logit probabilities exp(u0)/(exp(u0)+exp(u1))
  
  pchoice
}

# D. Contraction Mapping Algorithm
# (generate the forward-looking choice probabilities)

contraction_mapping=function(M, X, mat_trans, params, beta=1, threshold=1e-6, suppr_output=FALSE){
  
  # compute dynamic expected values 
  # Compute the matrix of values corresponding to the choices and states


  # Output
  # each row is state
  # Column 1 is forward-looking
  # Column 2 is myopic
  
  achieved = TRUE
  

  
  ##lp = length(p) 
  
  # ST_matの作成。pによって決まる。要素[i,i+j]はiが現在の状態で、次の時期にi+jに移る確率
  ##for(i in 1:M){
  ##  for(j in 1:lp){
  ##    if((i+j-1)<M)  F0_mat[i,i+j-1] = p[j] # ST_mat[i,i]はp[1]、すなわち状態が遷移しない確率
  ##    if((i+j-1)==M) F0_mat[i,i+j-1] = sum(p[j:lp]) #列がこれ以上ない状態なので、それ以上走行距離が伸びないと仮定。
  ##  }
  ##}
  
  # 状態遷移確率
  # Poisson probability distribution
  # Transpose because expectation lambda is row, next period catch is column
  ## ST_mat = t(sapply(c(1:10),dpois_bnd,x = 0:10))
  
  
  # initialize expected value
  k = 0 # initialize the contraction mapping loop 
  EV = matrix(0,21*30*31*2,2) # Mx2
  
  # initial value: same in myopic and EV_new 
  EV_myopic = EV_new = myopic_util(X, params) 
  
  print(max(abs(EV_new-EV)) > threshold)
  # Contraction mapping loop
  while(max(abs(EV_new-EV)) > threshold){
    print(k)
    # Store the former expected value
    EV = EV_new # Mx2
    # Obtained the probability of maintenance and replacement from the former expected value
    pchoice = choice_prob(EV) # Mx2
    # Compute the expected cost for each state: Nx1 vector
    ecost = rowSums(pchoice*EV) # Mx1
    # Compute the two components of forward-looking utility: In case of maintenance, 
    # utility of future states weighted by transition probabilities. In case of replacement,
    # the future utility is the utility of state 0
    futil_maint = mat_trans%*%ecost # F0_mat%*%ecost  # if maintain, the uture utility depends on the state probability
    futil_repl = 0 ## F1_mat%*%ecost    # if replace, the future utility is state 0
    # futil_repl or "return" is zero future utility, because stopping.
    futil = cbind(futil_maint,futil_repl)
    # Future utility is discounted by beta, and added to the myopic cost. 
    EV_new = EV_myopic + beta*futil
    k = k+1
    if(k == 1000) achieved = FALSE
  }
  
  if(!suppr_output){
    if(achieved){
      cat("Convergence achieved in ",k," iterations")
    } else {
      cat("CM could not converge! Mean difference = ",round(mean(EV_new-EV),2))
    }
  }
  
  # return both dynamic and myopic cases
  list(EV_new=EV_new, CP_forward=choice_prob(EV_new),EV_myopic = EV_myopic,CP_myopic=choice_prob(EV_myopic))
}

# ======== IV. Generation of simulated data  ====================

# 1) True coefficients

# Here, we use the same coefficients as the one described earlier:


# Finally, we assume that there are 70 discrete states for the bus' mileage 
# (i.e. no bus has a mileage greater than 345,000 in the dataset).

# 2) Choice probabilities, as a function of the bus' state:

# Using the Contraction Mapping algorithm implemented earlier, we can obtain the 
# probabibility of maintenance as a function of the bus' state for a forward-looking
# agent and a myopic agent.


params_1 = c(price,fuel_p,theta21)

M = nrow(mat_spe0) # dimension of states
X = mat_spe0

# use true parameters to compute the values by contraction mapping
out1 = contraction_mapping(M=M, X=X,mat_trans = mat_trans, 
                          params=params_1, threshold=1e-6, beta = 1)
out2 = contraction_mapping(M=M, X=X,mat_trans = mat_trans2, 
                           params=params_1, threshold=1e-6, beta = 1)
lin_forward=out1$CP_forward # dynamic
lin_myopic=out1$CP_myopic   # myopic
pchoice = lin_forward[,1]


mat_spe2 = cbind(mat_spe0,
                 CP_forward_good = out1$CP_forward[,1],
                 CP_myopic_good = out1$CP_myopic[,1],
                 CP_forward_bad = out2$CP_forward[,1],
                 CP_myopic_bad = out2$CP_myopic[,1])

plot(mat_spe2$day,mat_spe2$CP_forward_good)

# 3 dimenstion: day, H20 and probability to continue
ggplot(mat_spe2, aes(x = day, y = h20, col = CP_forward_good)) + 
  geom_jitter(size = 0.1) + 
  scale_color_viridis_c() +
  labs(x = "Trip Days", y = "Total catch in the first 20 days",
       col = "Probability of\n continuation") +
  theme_bw()

# 3)　Data generation

# use the same catch data as the scenario 1

data_h20 = data %>%
  group_by(ves,trip) %>%
  mutate(h20 = cumsum(catch),
         h20_hetero = ifelse(ves %% 2 == 1,
                               cumsum(catch),
                               cumsum(catch2))) %>%
  ungroup() %>%
  filter(day==20) %>%
  select(ves,trip,h20,h20_hetero)

data2 = data %>% 
  left_join(data_h20, by = c("ves","trip")) %>%
  group_by(ves,trip) %>%
  mutate(cum_catch = cumsum(catch),
         add_catch = cum_catch - h20,
         cum_catch_hetero = ifelse(ves %% 2 == 1,
                                   cumsum(catch),
                                   cumsum(catch2)),
         add_catch_hetero = cum_catch_hetero - h20) %>%
  ungroup() %>%
  left_join(mat_spe2, by = c("h20" = "h20","day" = "day",
                             "add_catch" ="ad","catch_exp"="prev")) %>%
  # draw uniform dist, and "return" if the number is greater the probability
  group_by(ves,trip) %>%
  mutate(ret1 = (CP_forward_good < runif(n()))*1,  # good production
         ret1 = ifelse(is.na(ret1),0,ret1),
         ret2 = cumsum(ret1),
         ret3 = (ret2 > 0)*1,
         ret1_hetero = ifelse(ves %% 2 == 1, # if odd number 
                       (CP_forward_good < runif(n()))*1,  # good production
                       (CP_forward_bad < runif(n()))*1),  # bad production if even number vessel 
         ret1_hetero = ifelse(is.na(ret1_hetero),0,ret1_hetero),
         ret2_hetero = cumsum(ret1_hetero),
         ret3_hetero = (ret2_hetero > 0)*1) %>%
  ungroup() %>%
  mutate(fresh_loss = (theta21/2)*(day-20)^2*h20/(price*cum_catch))

# scenario 2 trip data
# add_catch and h20 for scenario 1 and 3
# for scenario 3, use cum_catch_hetero and add_catch_hetero
data_trip_sec1 = data2 %>% 
  filter(day == trip_day)

ggplot(data_trip_sec1, aes(x = day, y = cum_catch)) + 
  geom_jitter()

data_trip_sec2 = data2 %>%
  filter((ret1 == 1 & ret2 == 1 & ret3 == 1))

ggplot(data_trip_sec2, aes(x = day, y = cum_catch)) + 
  geom_jitter()

data_trip_sec2_hetero = data2 %>%
  filter((ret1_hetero == 1 & ret2_hetero == 1 & ret3_hetero == 1))

ggplot(data_trip_sec2_hetero, aes(x = day, y = cum_catch_hetero)) + 
  geom_jitter()
#************************************************************************
#*#************************************************************************
#************************************************************************
#* To pass the data to the main file (daily_manuscript_ver7_Revision.Rmd), 
#* save the generated data file. 
write_csv(mat_spe2, paste0("mat_spe2_",format(Sys.Date()),".csv"))
write_csv(data_trip_sec1,paste0("data_trip_sec1_",format(Sys.Date()),".csv"))
write_csv(data_trip_sec2,paste0("data_trip_sec2_",format(Sys.Date()),".csv"))
write_csv(data_trip_sec2_hetero,paste0("data_trip_sec2_hetero_",format(Sys.Date()),".csv"))




# -------------------- Frontier with DEA (for appendix) -----------------------
# data of efficiencies evaluate by each frontier (sec1 data for sec1 frontier, sec2 data for sec2 frontier)
data_eff_dea_dist_homo = bind_rows(data_trip_sec1_eff[,c("ves","trip","day","efficiency")] %>% 
                                 mutate(data = "Sec.1", frontier = "Sec.1"),
                               data_trip_sec2_eff[,c("ves","trip","day","eff_dea_sec2_homo")] %>% 
                                 rename("efficiency" = "eff_dea_sec2_homo") %>%
                                 mutate(data = "Sec.2", frontier = "Sec.2")
)

data_eff_dea_dist_hetero = bind_rows(data_trip_sec1_eff[,c("ves","trip","day","efficiency.1")] %>% 
                                   rename("efficiency" = "efficiency.1") %>% 
                                   mutate(data = "Sec.1", frontier = "Sec.1"),
                                 data_trip_sec2_eff_hetero[,c("ves","trip","day","eff_dea_sec2_hetero")] %>% 
                                   rename("efficiency" = "eff_dea_sec2_hetero") %>% 
                                   mutate(data = "Sec.2", frontier = "Sec.2")
)

# plot: distribution comparison by DEA
plot_eff_dea_dist_homo = ggplot(data_eff_dea_dist_homo, aes(x = efficiency, col = data)) +
  stat_density(position = "identity", geom = "line", alpha = 0.8)+ 
  ylim(0,8) + 
  xlim(0.2,1.00) +
  scale_linetype_discrete(labels = c("Planned","Endogeneous")) + 
  scale_color_discrete(labels = c("Planned","Endogeneous")) +
  labs(title = "Homogeneous technology", x = "Efficiency by DEA", y = "Density",
       col = "Data") +
  theme_bw() + 
  theme(legend.position = "bottom")

plot_eff_dea_dist_hetero = ggplot(data_eff_dea_dist_hetero, aes(x = efficiency, col = data)) +
  stat_density(position = "identity", geom = "line", alpha = 0.8)+ 
  ylim(0,8) + 
  xlim(0.2,1.00) + 
  scale_linetype_discrete(labels = c("Planned","Endogeneous")) + 
  scale_color_discrete(labels = c("Planned","Endogeneous")) +
  labs(title = "Heterogeneous technology", x = "Efficiency by DEA", y = "Density",
       col = "Data")+ 
  theme_bw()+ 
  theme(legend.position = "bottom")

# ***** Plot used in the appendix *****
plot_eff_dea_dist_homo + plot_eff_dea_dist_hetero + plot_layout(guides = 'collect') &
  theme(legend.position='bottom')

# C. Export of the data

# We only need the decision state and the choice, as the other variables will not be used in the estimation section.


